1 Load libraries

library(limma)
library(edgeR)
library(ggplot2)
library(RColorBrewer)
library(stringr)
library(ggrepel)
library(plotly)
library(processx)
library(biomaRt)
library(WGCNA)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggpubr)
library(dplyr)
library(igraph)
library(DT)
library(networkD3)

2 Importing data to the environment

#IMR90 normalized expression matrix =========
df<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/imr90_wgcna/IMR90_edat.RDS")
IMR90_ER_edat<-df[,grep("ER", colnames(df))]
IMR90_GFP_edat<-df[,grep("GFP", colnames(df))]
IMR90_ER_GFP_edat<-cbind(IMR90_GFP_edat, IMR90_ER_edat)

3 IMR90 data processing

#ER_samples =======================
df_er<-df[, grep("ER", colnames(df))]

df_er.pca = prcomp(t(df_er), center = TRUE, scale = TRUE)
group_er<-factor(c(rep("ER_0", 3), rep("ER_4", 3), rep("ER_8", 3), 
                   rep("ER_12", 3), rep("ER_16", 3), rep("ER_20", 3), 
                   rep("ER_24", 3), rep("ER_32", 3), rep("ER_40", 2), rep("ER_48",3)),
                   levels = c("ER_0", "ER_4", "ER_8", "ER_12", "ER_16", "ER_20",
                              "ER_24", "ER_32", "ER_40", "ER_48"))

df_er.plot=data.frame(df_er.pca$x[,1],df_er.pca$x[,2],group_er, rownames(df_er.pca$x))
colnames(df_er.plot)=c("PC1","PC2","group_er","cond")
rownames(df_er.plot)=rownames(df_er.pca$x)
percentage_er <- round(df_er.pca$sdev / sum(df_er.pca$sdev) * 100, 2)
percentage_er_lab <- paste(colnames(df_er.plot), "(", paste( as.character(percentage_er), "%", ")", sep=""))

#GFP_samples =========================
df_gfp<-df[, grep("GFP", colnames(df))]

df_gfp.pca = prcomp(t(df_gfp), center = TRUE, scale = TRUE)
group_gfp<-factor(c(rep("GFP_0", 3), rep("GFP_4", 2), rep("GFP_8", 3), 
                   rep("GFP_12", 1), rep("GFP_16", 3), rep("GFP_20", 3), 
                   rep("GFP_24", 3), rep("GFP_32", 3), rep("GFP_40", 3), rep("GFP_48",3)),
                   levels = c("GFP_0", "GFP_4", "GFP_8", "GFP_12", "GFP_16", "GFP_20",
                              "GFP_24", "GFP_32", "GFP_40", "GFP_48"))

df_gfp.plot=data.frame(df_gfp.pca$x[,1],df_gfp.pca$x[,2],group_gfp, rownames(df_gfp.pca$x))
colnames(df_gfp.plot)=c("PC1","PC2","group_gfp","cond")
rownames(df_gfp.plot)=rownames(df_gfp.pca$x)
percentage_gfp <- round(df_gfp.pca$sdev / sum(df_gfp.pca$sdev) * 100, 2)
percentage_gfp_lab <- paste(colnames(df_gfp.plot), "(", paste( as.character(percentage_gfp), "%", ")", sep=""))

4 Plotting 3D PCA plot of IMR90 ER and GFP samples

tot_explained_variance_ratio_er <- summary(df_er.pca)[["importance"]]['Proportion of Variance',]
tot_explained_variance_ratio_er<- 100 * sum(tot_explained_variance_ratio_er)

tit_er<-paste0("Total Explained Variance = ", tot_explained_variance_ratio_er, "\n PCA of normalized IMR90 ER samples")

components_er<-data.frame(df_er.pca$x[,1],df_er.pca$x[,2],df_er.pca$x[,3], group_er, rownames(df_er.pca$x))
colnames(components_er)<-c("PC1","PC2", "PC3", "group_er", "sample_names")

axx <- list(
  title = paste0("PC1 (", percentage_er[1], ")" ))
axy <- list(
  title = paste0("PC2 (", percentage_er[2], ")" ))

axz <- list(
  title = paste0("PC3 (", percentage_er[3], ")" ))

fig_er <- plot_ly(components_er, x = ~PC1, y = ~PC2, z = ~PC3, color = ~group_er, colors = c('#636EFA','#EF553B','#00CC96')) %>%
  add_markers(size = 28)


fig_er <- fig_er %>%
  layout(
    title = tit_er,
    scene = list(bgcolor = "white", 
                 xaxis=axx, 
                 yaxis=axy, 
                 zaxis=axz)
    )

fig_er
tot_explained_variance_ratio_gfp <- summary(df_gfp.pca)[["importance"]]['Proportion of Variance',]
tot_explained_variance_ratio_gfp<- 100 * sum(tot_explained_variance_ratio_gfp)

tit_gfp<-paste0("Total Explained Variance = ", tot_explained_variance_ratio_gfp, "\n PCA of normalized IMR90 GFP samples")

components_gfp<-data.frame(df_gfp.pca$x[,1],df_gfp.pca$x[,2],df_gfp.pca$x[,3], group_gfp, rownames(df_gfp.pca$x))
colnames(components_gfp)<-c("PC1","PC2", "PC3", "group_gfp", "sample_names")

axx <- list(
  title = paste0("PC1 (", percentage_gfp[1], ")" ))

axy <- list(
  title = paste0("PC2 (", percentage_gfp[2], ")" ))

axz <- list(
  title = paste0("PC3 (", percentage_gfp[3], ")" ))


fig_gfp <- plot_ly(components_gfp, x = ~PC1, y = ~PC2, z = ~PC3, color = ~group_gfp, colors = c('#636EFA','#EF553B','#00CC96')) %>%
  add_markers(size = 28)


fig_gfp <- fig_gfp %>%
  layout(
    title = tit_gfp,
    scene = list(bgcolor = "white",
                 xaxis=axx, 
                 yaxis=axy, 
                 zaxis=axz))

fig_gfp

5 Diffierentially expressed genes analysis of IMR90 ER and GFP samples at 48h

group_48h_DEGs<-factor(c(rep("GFP_48",3), rep("ER_48",3)), levels = c("GFP_48", "ER_48"))
design<-model.matrix(~0 + group_48h_DEGs)
IMR90_48h_ER_GFP<-df[,c(grep("GFP_48",colnames(df)), grep("ER_48", colnames(df)))]
comparison = "group_48h_DEGsER_48 - group_48h_DEGsGFP_48"
cont=makeContrasts(comparison,levels=design)
fit=lmFit(IMR90_48h_ER_GFP,design)
vfit=contrasts.fit(fit,contrasts=cont)
efit<- eBayes(vfit)
DEGs_48h<-topTreat(efit, sort.by = "P", n = Inf)
dim(DEGs_48h)
## [1] 13870     6
DEGs_48h_sig_gene_list<-DEGs_48h[abs(DEGs_48h$logFC)>=0 & DEGs_48h$adj.P.Val <= 0.05,]
dim(DEGs_48h_sig_gene_list)
## [1] 10513     6
IMR90_ER_sig_DEGs<-df[rownames(DEGs_48h_sig_gene_list), grep("ER", colnames(df))]
dim(IMR90_ER_sig_DEGs)
## [1] 10513    29

6 Gene co-expression analysis by WGCNA with IMR90 ER samples

#Clustering by WGCNA (IMR90 48h ER vs.GFP DEGs with abs lfc > = 0 & padj <= 0.05,  total 10513 genes) 

nettype = "signed"
powers = c(c(1:10), seq(from = 12, to=20, by=2))
IMR90_ER_expression_data<-IMR90_ER_sig_DEGs

sft_ER=pickSoftThreshold(t(IMR90_ER_expression_data), dataIsExpr = TRUE, powerVector = powers, networkType = nettype, verbose = 5)

# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft_ER$fitIndices[,1], -sign(sft_ER$fitIndices[,3])*sft_ER$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab= paste0("Scale Free Topology Model Fit ", nettype, " R^2"), type="n",
     main = paste("Scale independence"))+text(sft_ER$fitIndices[,1], -sign(sft_ER$fitIndices[,3])*sft_ER$fitIndices[,2],
     labels=powers,cex=1,col="red")+abline(h=0.90,col="red")

# Mean connectivity as a function of the soft-thresholding power
plot(sft_ER$fitIndices[,1], sft_ER$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))+text(sft_ER$fitIndices[,1], sft_ER$fitIndices[,5], labels=powers, cex=1, col="red")


##Create co-expression matrix=========================
cor <- WGCNA::cor
net_ER = blockwiseModules(
  t(IMR90_ER_expression_data),
  power = 16 , #https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/faq.html Q#6 or based on powerEstimate
  TOMType = "signed", 
  minModuleSize = 30,   # We like large modules, so we set the minimum module size relatively high
  reassignThreshold = 0, 
  mergeCutHeight = 0.25,
  numericLabels = TRUE, 
  pamRespectsDendro = FALSE,
  saveTOMs = F,
  verbose = 3
)
table(net_ER$colors)
cor<-stats::cor
#saveRDS(net_ER, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/imr90_wgcna/WGCNA_net_ER.rds")

6.1 Gene modules analysis on IMR90 ER samples

IMR90_ER_expression_data<-IMR90_ER_sig_DEGs
net_ER<-readRDS(file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/imr90_wgcna/WGCNA_net_ER.rds")
sizeGrWindow(12,10)
moduleLabels = net_ER$colors
mergedColors = labels2colors(net_ER$colors)
plotDendroAndColors(net_ER$dendrograms[[1]], mergedColors[net_ER$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
reference_df<-data.frame(gene = names(moduleLabels), number = unname(moduleLabels), color = mergedColors)

MEs = net_ER$MEs;
geneTree = net_ER$dendrograms[[1]];

# Module identification using dynamic tree cut:
minModuleSize = 30
dynamicMods = cutreeDynamic(dendro = geneTree,
                            deepSplit = 2, 
                            pamRespectsDendro = FALSE,
                            minClusterSize = minModuleSize);
## Warning in cutreeDynamic(dendro = geneTree, deepSplit = 2, pamRespectsDendro = FALSE, : cutreeDynamic: method "hybrid" requires a valid dissimilarity matrix "distM". 
## Defaulting to method "tree".
table(dynamicMods)
## dynamicMods
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
## 174 494 370 283 237 192 186 181 174 148 144 143 141 129 102  98  97  94  90  89 
##  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39 
##  81  80  78  76  74  73  70  65  63  62  55  55  53  53  53  52  49  47  45  45 
##  40  41  42  43  44 
##  43  42  41  40  39
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
## dynamicColors
##           black            blue           brown            cyan       darkgreen 
##             181             370             283             102              78 
##        darkgrey     darkmagenta  darkolivegreen      darkorange         darkred 
##              74              53              53              70              80 
##   darkturquoise     floralwhite           green     greenyellow            grey 
##              76              39             192             143             174 
##          grey60           ivory       lightcyan      lightcyan1      lightgreen 
##              94              40              97              41              90 
## lightsteelblue1     lightyellow         magenta   mediumpurple3    midnightblue 
##              42              89             148              43              98 
##          orange      orangered4   paleturquoise            pink           plum1 
##              73              45              55             174              45 
##          purple             red       royalblue     saddlebrown          salmon 
##             144             186              81              62             129 
##         sienna3         skyblue        skyblue3       steelblue             tan 
##              52              63              47              55             141 
##       turquoise          violet           white          yellow     yellowgreen 
##             494              53              65             237              49
# Plot the dendrogram and colors underneath
#sizeGrWindow(10,8)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05,
                    main = "Gene dendrogram and module colors")

# Module-trait(timepoint) relationship for ER samples
head(IMR90_ER_expression_data)
##          ER_0_1   ER_0_2   ER_0_3   ER_4_1   ER_4_2   ER_4_3   ER_8_1   ER_8_2
## PLXDC2 1.893141 2.312867 2.282067 2.699412 2.702465 2.493034 3.146952 3.288309
## ASF1B  5.072493 5.114994 5.061236 5.106026 5.050805 4.880512 5.392721 5.372936
## KIF2C  3.974835 3.900262 3.848738 3.789110 3.756419 3.980666 4.209206 4.106902
## ANLN   6.498003 6.671398 6.638677 6.631437 6.601337 6.478345 6.784324 6.799676
## PCNA   6.502892 6.504741 6.515332 6.657774 6.616888 6.684128 6.886018 6.898885
## RRM2   6.707925 6.774455 6.836511 6.816400 6.856196 6.800998 7.130278 7.116562
##          ER_8_3  ER_12_1  ER_12_2  ER_12_3  ER_16_1  ER_16_2  ER_16_3  ER_20_1
## PLXDC2 3.194628 3.362010 3.412058 3.366088 3.081144 3.157554 3.266120 3.042571
## ASF1B  5.383395 5.919668 5.829346 5.911272 6.315556 6.296359 6.359931 6.409018
## KIF2C  4.310285 4.476799 4.647568 4.624842 4.717777 4.751112 4.812691 5.440505
## ANLN   6.830324 7.120443 7.149276 7.156529 7.527337 7.559166 7.588323 8.038873
## PCNA   6.794903 7.141812 7.138863 7.189974 7.285442 7.257352 7.306676 7.255316
## RRM2   7.084269 7.625058 7.632879 7.701954 8.118555 8.118053 8.134556 8.486527
##         ER_20_2  ER_20_3  ER_24_1  ER_24_2  ER_24_3  ER_32_1  ER_32_2  ER_32_3
## PLXDC2 3.281604 3.046997 2.665396 2.615856 2.757200 2.695371 2.611128 2.134501
## ASF1B  6.496424 6.407333 6.382894 6.446343 6.382189 6.197166 6.458424 6.392571
## KIF2C  5.412143 5.474927 5.725647 5.698976 5.704604 5.477043 5.575120 5.582321
## ANLN   8.057891 8.136565 8.235175 8.310818 8.368891 8.364972 8.106027 7.791679
## PCNA   7.279115 7.220253 7.204059 7.146118 7.164607 7.358832 7.313197 7.352046
## RRM2   8.558498 8.549568 8.704232 8.699960 8.718489 9.020252 8.887162 8.749808
##         ER_40_1  ER_40_3  ER_48_1  ER_48_2  ER_48_3
## PLXDC2 2.028296 2.133444 2.070650 2.124552 2.204936
## ASF1B  6.662957 6.410296 6.172017 6.288384 6.160422
## KIF2C  5.590952 5.458114 5.347758 5.355094 5.431088
## ANLN   7.260375 8.061378 8.018595 7.960232 8.062980
## PCNA   7.383814 7.308021 7.452807 7.411484 7.407103
## RRM2   8.562631 8.816505 8.809535 8.687479 8.808142
design_ER<-colnames(IMR90_ER_expression_data)
design_ER<-unlist(lapply(design_ER, function(x) substring(x, 1, nchar(x)-2)))
design_ER<-gsub("_48", "_t5",design_ER)
design_ER<-gsub("_40", "_t5",design_ER)
design_ER<-gsub("_32", "_t4",design_ER)
design_ER<-gsub("_24", "_t4",design_ER)
design_ER<-gsub("_20", "_t3",design_ER)
design_ER<-gsub("_16", "_t3",design_ER)
design_ER<-gsub("_12", "_t2",design_ER)
design_ER<-gsub("_8", "_t2",design_ER)
design_ER<-gsub("_4", "_t1",design_ER)
design_ER<-gsub("_0", "_t1",design_ER)
design_er = model.matrix(~0 + design_ER)
colnames(design_er) = levels(as.factor(design_ER))

moduleTraitCor = cor(MEs, design_er, use = "p")
nSamples = ncol(t(IMR90_ER_expression_data))
#moduleTraitPvalue_student = corPvalueStudent(moduleTraitCor, nSamples) #Student asymptotic p-value for correlation
moduleTraitPvalue_fisher = corPvalueFisher(moduleTraitCor, nSamples) #Fisher's asymptotic p-value for correlation

sizeGrWindow(10,6)
# Display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
                   signif(moduleTraitPvalue_fisher, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(5, 5, 5, 5));

# Display the correlation values within a heatmap
colnames(MEs)<-substr(colnames(MEs), 3, nchar(colnames(MEs)))
colnames(MEs)<-paste0("Module ", colnames(MEs))
#moduleTraitCor<-moduleTraitCor[,factor(c("ER_0", "ER_early", "ER_middle", "ER_late"), levels = c("ER_0", "ER_early", "ER_middle", "ER_late"))]
moduleTraitCor<-moduleTraitCor[,factor(c("ER_t1", "ER_t2", "ER_t3", "ER_t4", "ER_t5"), levels = c("ER_t1", "ER_t2", "ER_t3", "ER_t4", "ER_t5"))]
rownames(moduleTraitCor)<-paste0("Module_", unlist(lapply(rownames(moduleTraitCor), function(x) substr(x, 3, nchar(x)))))

#pdf(file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/figure/IMR90_WGCNA_Module-time_period_relationships.pdf")
labeledHeatmap(Matrix = moduleTraitCor,
               #xLabels = c("ER_0", "ER_early", "ER_middle", "ER_late"),
               xLabels = c("ER_t1", "ER_t2", "ER_t3", "ER_t4", "ER_t5"),
               yLabels = names(MEs),
               ySymbols = names(MEs),
               colorLabels = FALSE,
               colors = blueWhiteRed(100),
               textMatrix = textMatrix,
               setStdMargins = T,
               cex.text = 0.4,
               zlim = c(-1,1),
               main = paste("IMR90_ER: Module-time period relationships"))
#dev.off()
# plot Eigengene adjacency heatmap
#pdf(file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/figure/IMR90_WGCNA_Eigengene_adjacency_heatmap.pdf")
plotEigengeneNetworks(MEs, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle = 30)
#dev.off()

6.2 Eigengene analysis on IMR90 ER samples

# 1. Performing Eigengene analysis on gene modules across time in IMR90 ER samples
ER_time_point<-c("ER_0", "ER_4", "ER_8", 
                 "ER_12", "ER_16", "ER_20", 
                 "ER_24", "ER_32", "ER_40", 
                 "ER_48")

MEs_mean<-data.frame(modules = rownames(t(MEs)))
for (i in 1:length(ER_time_point)){
  time_point<-ER_time_point[i]
  sub_df<-as.data.frame(t(MEs)[, grep(time_point, colnames(t(MEs)))])
  sub_df[,"mean"]<-apply(sub_df, 1, mean)
  df_mean<-as.data.frame(sub_df[, "mean"])
  colnames(df_mean)<-time_point
  MEs_mean<-cbind(MEs_mean, df_mean)
}

rownames(MEs_mean)<-MEs_mean$modules
MEs_mean<-MEs_mean[,-1]
MEs_mean_df<-reshape2::melt(as.matrix(MEs_mean))

colnames(MEs_mean_df)<-c("Module", "time", "Eigengene")
MEs_mean_df$time<-unlist(lapply(as.character(MEs_mean_df$time), function(x) strsplit(x, "_")[[1]][2]))
MEs_mean_df$time<-as.numeric(MEs_mean_df$time)
MEs_mean_df$Eigengene<-as.numeric(MEs_mean_df$Eigengene)

theme<-theme(panel.background = element_blank(),
             panel.border=element_rect(fill=NA),
             panel.grid.major = element_blank(),
             panel.grid.minor = element_blank(),
             strip.background=element_blank(),
             axis.text.x=element_text(colour="black"),
             axis.text.y=element_text(colour="black"),
             axis.ticks=element_line(colour="black"),
             plot.margin=unit(c(1,1,1,1),"line"),
             axis.title.x = element_text(color="black", size=20, face="bold"),
             axis.title.y = element_text(color="black", size=20, face="bold"),
             axis.text = element_text(size = 20, face = "bold"))

for (i in 1:length(rownames(MEs_mean))){
  module<-rownames(MEs_mean)[i]
  df_module<-MEs_mean_df[MEs_mean_df$Module == module,]
  p<-ggplot(df_module, 
       aes(x=factor(time, levels = c(0,4,8,12,16,20,24,32,40,48)),
           y=Eigengene, colour=Module, group=Module, fill=Module)) +
  geom_line(size =0.1)+
  geom_point(size=0.5)+
  ylim(-0.4, 0.4)+
  labs(x = 'time (h)', y = "Eigengene",title = 'Eigengenes of the each WGCNA modules')
  #plot(p)
}
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# 2. Putting the dynamic Eigengene curve of each module into groups based on the distance of modules

  p1<-ggplot(MEs_mean_df[MEs_mean_df$Module %in% c("Module 14", "Module 1", "Module 11"), ],
      mapping = aes(x=factor(time, levels = c(0,4,8,12,16,20,24,32,40,48)), y=Eigengene, group = Module, fill = Module)) +
  geom_line(aes(color=Module), size = 0.8)+
  geom_point(size=2, shape = 21)+
  scale_fill_brewer(palette = "Blues")+
  scale_color_brewer(palette = "Blues")+
  ylim(-0.4, 0.4)+
  labs(x = 'time (h)', y = "Eigengene")

  p1<-p1+theme

  p2<-ggplot(MEs_mean_df[MEs_mean_df$Module %in% c("Module 13", "Module 5", "Module 10", "Module 12"), ],
      mapping = aes(x=factor(time, levels = c(0,4,8,12,16,20,24,32,40,48)), y=Eigengene, group = Module, fill = Module)) +
  geom_line(aes(color=Module), size = 0.8)+
  geom_point(size=2, shape = 21)+
  scale_fill_brewer(palette = "Greens")+
  scale_color_brewer(palette = "Greens")+
  ylim(-0.4, 0.4)+
  labs(x = 'time (h)', y = "Eigengene")

  p2<-p2+theme
  
  p3<-ggplot(MEs_mean_df[MEs_mean_df$Module %in% c("Module 4", "Module 3", "Module 8"), ],
      mapping = aes(x=factor(time, levels = c(0,4,8,12,16,20,24,32,40,48)), y=Eigengene, group = Module, fill = Module)) +
  geom_line(aes(color=Module), size = 0.8)+
  geom_point(size=2, shape = 21)+
  scale_fill_brewer(palette = "Oranges")+
  scale_color_brewer(palette = "Oranges")+
  ylim(-0.4, 0.4)+
  labs(x = 'time (h)', y = "Eigengene")

  p3<-p3+theme
  
  p4<-ggplot(MEs_mean_df[MEs_mean_df$Module %in% c("Module 6", "Module 9", "Module 7", "Module 2"), ],
      mapping = aes(x=factor(time, levels = c(0,4,8,12,16,20,24,32,40,48)), y=Eigengene, group = Module, fill = Module)) +
  geom_line(aes(color=Module), size = 0.8)+
  geom_point(size=2, shape = 21)+
  scale_fill_brewer(palette = "Purples")+
  scale_color_brewer(palette = "Purples")+
  ylim(-0.4, 0.4)+
  labs(x = 'time (h)', y = "Eigengene")

  p4<-p4+theme

  WGCNA_Eigengenes_plot<-ggarrange(p1, p2, p3, p4, 
              ncol = 2,
              nrow = 2
) 
 WGCNA_Eigengenes_plot

#pdf(file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/figure/IMR90_WGCNA_Eigengenes_dynamics.pdf", width = 12, height = 8)
#WGCNA_Eigengenes_plot
#dev.off()

6.3 GO-terms analysis on gene modules from IMR90 ER samples

#genemart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
#saveRDS(genemart, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/genemart.rds")
genemart<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/imr90_wgcna/genemart.rds")
probes = colnames(t(IMR90_ER_expression_data))
g_list<-getBM(filters = "hgnc_symbol", 
                       attributes= c("ensembl_gene_id","hgnc_symbol", "entrezgene_id"),values=probes,mart= genemart)
gene_list<-list()
for (i in 1:length(unique(moduleLabels))){
  number<-unique(moduleLabels)[i]
  module_name<-paste0("Module_", number)
  modnumber<-probes[moduleLabels == number]
  df_genes<-g_list[g_list$hgnc_symbol %in% modnumber,"ensembl_gene_id"]
  gene_list[[module_name]]<-df_genes
}
#names(gene_list)<-paste0("Module_", unique(moduleLabels))

gene_df<-list()
module_name_list<-paste0("Module_", c(1:14))
for (i in 1:14){
  module_name<-module_name_list[i] #remove Module_0, since it's the module with genes that do not co-express with others
  genes<-gene_list[[module_name]]
  df_genes<-g_list[g_list$ensembl_gene_id %in% genes,]
  df_genes<-df_genes[!duplicated(df_genes$hgnc_symbol),]
  gene_df[[module_name]]<-df_genes
}
##GO-Term enrichment based on WGCNA network================================
WGCNA_modules_go_result<-list()
for (i in 1:length(gene_df)){
  module_name<-names(gene_df)[i]
  genes<-gene_df[[module_name]]
  genes<-genes$hgnc_symbol
  em_ER_BP<-enrichGO(genes, 
                     OrgDb = org.Hs.eg.db, 
                     ont = "BP",
                   pAdjustMethod = "BH", 
                     pvalueCutoff = 0.05,
                     qvalueCutoff = 0.25, 
                     keyType = 'SYMBOL')
 WGCNA_modules_go_result[[module_name]]<-em_ER_BP@result
}
#saveRDS(WGCNA_modules_go_result, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/IMR90_WGCNA_modules_go_result.RDS")

6.4 Significant Go terms visualization for WGCNA modules

WGCNA_modules_go_result<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/imr90_wgcna/IMR90_WGCNA_modules_go_result.RDS")

color_pal<-c(brewer.pal(n = 3, "Blues"), brewer.pal(n = 4, "Greens"), brewer.pal(n = 3, "Oranges"), brewer.pal(n = 4, "Purples"))
sig_go_results_list_order<-paste0("Module_", c(14,1,11,13,5,10,12,4,3,8,6,9,7,2))

color_pal_new<-color_pal[c(1:9,11,13:14)]


sig_go_results_df<-data.frame()
for (i in 1:14){
  module<-as.character(sig_go_results_list_order[i])
  go_result<-WGCNA_modules_go_result[[module]]
  go_result[,"module"]<-module
  go_result<-go_result[go_result$p.adjust <= 0.05, ]
  go_result<-go_result[order(as.numeric(go_result$p.adjust), decreasing = F), ][1:20,]
  sig_go_results_df<-rbind(sig_go_results_df, go_result)
}
sig_go_results_df<-na.omit(sig_go_results_df)
sig_go_results_df<-sig_go_results_df[!duplicated(sig_go_results_df$Description),]
#write.csv(sig_go_results_df, file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/table/IMR90_WGCNA_modules_sig_go_result.csv")

p<-ggplot(data=sig_go_results_df, aes(x=factor(Description, levels = sig_go_results_df$Description), y= -log10(p.adjust)), fill = module)+
    geom_col(aes(fill = module), colour = "black", width=0.85, position=position_dodge(0.5)) +
    theme(axis.text.x = element_text(size = 10, angle=90, vjust=.5, hjust=1, face = "bold"),
           panel.background = element_blank(),
           legend.position= c(0.13, 0.7),
           legend.direction = "horizontal",
           legend.title = element_text(colour="black", size=20, 
                                    face="bold"),
           legend.text = element_text(colour="black", size=15, 
                                     face="bold"),
           legend.background = element_rect(fill="grey90",
                                  linewidth =0.5, linetype="solid"),
           axis.text.y = element_text(size = 15, face = "bold"),
           plot.title = element_text(size = 20, face = "bold"),
           axis.title = element_text(size = 20, face = "bold"),
           axis.line = element_line(colour = "black"))+
     ylab("-log10(p.adjust)") + 
     xlab("GO Terms")+
     scale_y_continuous(breaks = seq(0, -log10(min(sig_go_results_df$p.adjust))*1.0, by = 5))+
     scale_fill_manual(values=color_pal_new, limits = c(unique(sig_go_results_df$module)))+
     scale_x_discrete(labels = function(x) str_wrap(x, width = 65))
p

#pdf(file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/figure/IMR90_WGCNA_Modules_GO_enrichment.pdf", width = 35, height = 15)
#plot(p)
#dev.off()

6.5 Network centrality analysis and network visualization of gene modules from WGCNA

#adjMatrix <- adjacency(t(IMR90_ER_expression_data), power = 16, type = "signed")
#TOM = TOMsimilarity(adjMatrix)
#saveRDS(TOM, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/WGCNA_TOM.rds")

TOM = readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/imr90_wgcna/WGCNA_TOM.rds")
probes = colnames(t(IMR90_ER_expression_data))
dimnames(TOM)<-list(probes, probes)

# 1.Selecting top hub genes from each gene module
network_hub_list<-list()
nTOM = 40 #top nodes from the WGCNA network
for (i in 1:length(gene_df)){
  module<-names(gene_df)[i]
  genes<-gene_df[[module]][,"hgnc_symbol"]
  m<-TOM[genes, genes]
  hubs<-names(sort(rowSums(m), decreasing = T)[1:nTOM])
  network_hub_list[[module]]<-hubs
}

network_hub_list # The top 40 hubs in each module
## $Module_1
##  [1] "NIBAN2"  "CAPN1"   "INF2"    "MGAT4B"  "VPS28"   "FLOT2"   "FKBP8"  
##  [8] "FAM3A"   "GNB2"    "CDIPT"   "G6PD"    "METRN"   "RNPEPL1" "EHD2"   
## [15] "APRT"    "TSPAN4"  "FBXW5"   "PACS2"   "CAPG"    "TUBGCP2" "MAPK3"  
## [22] "BLVRB"   "SLC27A1" "VAT1"    "GNAI2"   "IGFBP6"  "DBNL"    "PIP5K1C"
## [29] "SPON2"   "MVP"     "IFITM3"  "ERCC2"   "PGLS"    "LY6E"    "NISCH"  
## [36] "TGFB1I1" "VEGFB"   "GSN"     "NPDC1"   "TSC2"   
## 
## $Module_2
##  [1] "FANCD2"   "MAD2L1"   "SMARCC1"  "CDK1"     "UCHL5"    "RRM2"    
##  [7] "SKA3"     "SMC4"     "CIP2A"    "NDC1"     "RRM2P3"   "CENPU"   
## [13] "NCAPG2"   "CENPN"    "ZNF714"   "HMGB2"    "PAICS"    "CBX1"    
## [19] "MELK"     "DDIAS"    "NDC80"    "SMC2"     "FANCI"    "CSE1L"   
## [25] "RAD51AP1" "NUP107"   "RFC5"     "SPC25"    "CCNB2"    "UBE2T"   
## [31] "CEP55"    "PBK"      "RFC3"     "SINHCAF"  "POLR3G"   "PAICSP4" 
## [37] "MTHFD2"   "DHX9"     "PHACTR4"  "MTF2"    
## 
## $Module_3
##  [1] "NUP153"  "NUP155"  "PPAT"    "BRCA2"   "HS2ST1"  "NAA50"   "ATAD5"  
##  [8] "MMS22L"  "SMCHD1"  "NUP50"   "XPOT"    "SLC16A1" "UTP20"   "SEH1L"  
## [15] "QSER1"   "ABCE1"   "NOC3L"   "ELOVL5"  "SASS6"   "NPAT"    "ZNF850" 
## [22] "IPO7"    "PDS5B"   "NUP58"   "DNA2"    "ATAD2"   "CHD1"    "AHCTF1" 
## [29] "HAUS6P1" "USP1"    "SCML2"   "NUDT21"  "KLHL23"  "ZNF678"  "PLOD2"  
## [36] "LRRC8B"  "ENC1"    "FMR1"    "MSH2"    "XPO4"   
## 
## $Module_4
##  [1] "OSBPL8"   "RLIM"     "RO60"     "CAMSAP2"  "ZFYVE16"  "DNAJB14" 
##  [7] "LTN1"     "ITCH"     "CACNA2D1" "NHLRC2"   "UBR3"     "ZBTB41"  
## [13] "TAF2"     "ZNF451"   "ERBIN"    "SEL1L"    "ATP11B"   "SOCS4"   
## [19] "NEDD4"    "EXOC5"    "ZNF800"   "TMF1"     "ARHGAP5"  "SLC16A7" 
## [25] "ITGAV"    "ACSL4"    "SMAD5"    "BMPR1A"   "MBNL1"    "EDEM1"   
## [31] "ZNF148"   "ATP7A"    "MOB1B"    "MDFIC"    "MAN2A1"   "SP3"     
## [37] "REST"     "TNKS2"    "MAN1A2"   "MARCHF6" 
## 
## $Module_5
##  [1] "THSD4"   "LMF1"    "IGFBP5"  "APCDD1L" "KCNMA1"  "H6PD"    "PHLDB1" 
##  [8] "CPA4"    "EPHB2"   "SIDT2"   "TNS1"    "SEMA7A"  "CD9"     "NPTX1"  
## [15] "MYPN"    "MYADM"   "GALNT10" "CCND1"   "SLC17A5" "GM2A"    "CDCP1"  
## [22] "BACE1"   "PLBD2"   "MGAT5"   "DHRS7"   "CXCL12"  "TGM2"    "SLC16A4"
## [29] "KREMEN1" "FBLN5"   "FAM120B" "LTBP2"   "MBTPS1"  "FOLR3"   "GALNT11"
## [36] "PTPRA"   "MEAK7"   "PSG4"    "LBH"     "WNT5A"  
## 
## $Module_6
##  [1] "RPS3AP6"  "RPS3AP26" "RPS3AP5"  "RPS3A"    "RPL9"     "RPS3AP21"
##  [7] "RPL9P7"   "RPL17P36" "RPS3AP20" "RPL7P9"   "RPL15"    "RPL7P32" 
## [13] "RPS3AP25" "RPL4P4"   "RPS3AP47" "RPL7"     "RPL17P34" "RPL6"    
## [19] "RPL5"     "RPL39"    "RPL5P4"   "RPL7P26"  "RPL4"     "SNHG5"   
## [25] "SF3B6"    "RPL7P1"   "RPL7P15"  "RPL5P34"  "RPL7P6"   "RPL7P24" 
## [31] "RPL39P3"  "RPS20P14" "RPL6P27"  "RPS23"    "RPS25"    "RPL17P43"
## [37] "RPS7P11"  "RPS3AP49" "RPL24P7"  "RPS4X"   
## 
## $Module_7
##  [1] "ALYREF"     "SMPD4"      "TK1"        "MAD2L2"     "CHTF18"    
##  [6] "SAE1"       "TOMM40"     "DTYMK"      "PPM1G"      "KHSRP"     
## [11] "TRAF4"      "SLC25A29"   "DCAF15"     "HNRNPUL1"   "UBN1"      
## [16] "FBL"        "HNRNPA1P63" "TCF3"       "TOMM40P4"   "SAFB"      
## [21] "LRRC20"     "LMNB2"      "CPSF4"      "TOMM40P1"   "RNPS1P1"   
## [26] "POP7"       "SGTA"       "RBM19"      "SAFB2"      "EZR"       
## [31] "TOMM34"     "H1-10"      "SNHG7"      "ITPKA"      "CNIH2"     
## [36] "CDKN2D"     "LAS1L"      "RANP1"      "ARRB2"      "TRAF2"     
## 
## $Module_8
##  [1] "USP12"    "PAK2"     "RPRD1A"   "ATP11C"   "UBA6"     "PANK3"   
##  [7] "ARPP19"   "SMARCAD1" "TNPO1"    "MAPK6"    "USP15"    "PAPOLA"  
## [13] "MYSM1"    "MARCHF7"  "PPP4R2"   "TAB2"     "ZNF518B"  "RP2"     
## [19] "DNM1L"    "STAG2"    "KIF5B"    "MOB1A"    "QKI"      "ITGA4"   
## [25] "LYPLA1"   "PGAP1"    "PPP6R3"   "PTPN4"    "MORC3"    "UBQLN1"  
## [31] "FAM91A1"  "AHR"      "SPRED1"   "PCNX4"    "AIDAP1"   "TTLL7"   
## [37] "SOAT1"    "BCLAF1P2" "CGGBP1"   "SSX2IP"  
## 
## $Module_9
##  [1] "FANCA"   "HAS3"    "POLE"    "SMC1A"   "SLC1A4"  "SLC29A1" "NUP188" 
##  [8] "LZTS1"   "RNF227"  "PTBP1"   "TCF19"   "TFDP1"   "MYO19"   "TMEM201"
## [15] "MICAL3"  "AKAP1"   "SCAF4"   "CDCA4"   "ICMT"    "CDT1"    "NAA40"  
## [22] "NPHP4"   "SAP130"  "GEMIN4"  "TMEM109" "TONSL"   "PASK"    "TLN2"   
## [29] "FHOD3"   "CAD"     "GPRIN1"  "NUP214"  "MCM2"    "UTP4"    "POFUT1" 
## [36] "MANEAL"  "DPF1"    "MCM5"    "CABLES2" "ACACB"  
## 
## $Module_10
##  [1] "DSP"        "CSGALNACT1" "CDH6"       "TRPC6"      "ANKLE2"    
##  [6] "CHST11"     "CCN2"       "CREB3L2"    "SIRPA"      "SMOC1"     
## [11] "B4GALT1"    "FLRT2"      "BHLHE40"    "AKAP12"     "NOX4"      
## [16] "ADAM19"     "F3"         "KCNH1"      "TFPI2"      "MYOCD"     
## [21] "EDN1"       "IL11"       "PCNX1"      "TCF7"       "DCBLD1"    
## [26] "CRISPLD2"   "S1PR1"      "TIMP3"      "COL4A1"     "ATP2A2"    
## [31] "SLC22A23"   "ABHD2"      "SIRPB1"     "HIVEP2"     "PTGS2"     
## [36] "TEX2"       "CSTF2T"     "UCK2"       "PRSS23"     "KLF7"      
## 
## $Module_11
##  [1] "VARS1"    "PPP1R12C" "GNA11"    "SRM"      "ARF1"     "BCAR1"   
##  [7] "AP1M1"    "MRPL4"    "PGP"      "PLCB3"    "IMPDH1"   "AACS"    
## [13] "CNN2"     "FARSA"    "PSMG4"    "BOP1"     "DHX30"    "TRIM11"  
## [19] "ALDH16A1" "MFN2"     "ATAD3C"   "SIVA1"    "GTPBP1"   "CCDC9"   
## [25] "CNP"      "ATAD3B"   "KLF16"    "SMTN"     "SMARCA4"  "CNOT3"   
## [31] "MBD3"     "THOP1"    "KIAA2013" "ALKBH5"   "PREB"     "PRADC1"  
## [37] "TTLL12"   "MTA1"     "C19orf25" "APBA3"   
## 
## $Module_12
##  [1] "CBARP"    "CHMP1B"   "DACT1"    "TNFRSF1B" "INHBA"    "STC1"    
##  [7] "PMEPA1"   "IL1R1"    "PRPSAP1"  "TMEM120B" "SOX4"     "CBLB"    
## [13] "RASD2"    "ETS2"     "NR4A3"    "SLC16A6"  "CREM"     "NFATC2"  
## [19] "MAFK"     "PITX2"    "MSC"      "NEDD9"    "LIF"      "NGF"     
## [25] "PDGFA"    "PPP1R3C"  "IL4R"     "SPHK1"    "KLHL26"   "MAP3K4"  
## [31] "SSBP3"    "NKX3-1"   "SHROOM2"  "NHS"      "NFATC1"   "F2RL1"   
## [37] "TENT5A"   "MECOM"    "PITPNC1"  "NR4A2"   
## 
## $Module_13
##  [1] "CHRM2"      "CPED1"      "DSEL"       "SNX18"      "PRRX1"     
##  [6] "PRKCA"      "CPEB2"      "IGF2BP3"    "NRP1"       "CCDC50"    
## [11] "CAMK2D"     "CDC42EP3"   "PCDH18"     "FGD4"       "ELK3"      
## [16] "DENND10P1"  "CDC42EP3P1" "LIX1L"      "PRKAR1AP1"  "TBCK"      
## [21] "VCPIP1"     "NBEA"       "LINC00674"  "WASF3"      "WDR19"     
## [26] "SNAI2"      "PLAG1"      "SEC22B3P"   "SEC22B2P"   "FMN2"      
## [31] "PPP1R21"    "SATB1"      "RUNX2"      "FNIP1"      "NPEPPS"    
## [36] "PDPK1"      "MTCO1P2"    "ARL2BP"     "YWHAG"      "NOP56"     
## 
## $Module_14
##  [1] "KRTAP2-4" "KRTAP2-2" "KRTAP2-1" "KRTAP2-3" "FOSL1P1"  "FOSL1"   
##  [7] "PRAG1"    "FZD2"     "BCAR3"    "SEC14L1"  "NAMPT"    "CTH"     
## [13] "NAMPTP1"  "OSR1"     "PRDM1"    "PTPRE"    "AGFG2"    "DVL2"    
## [19] "MVK"      "AREG"     "FZD7"     "RYBP"     "FOSB"     "ATP8B1"  
## [25] "TMEM87A"  "TRAK1"    "FRY"      "NR3C2"    "JCAD"     "GRB10"   
## [31] "RBMS2P1"  "AMMECR1L" "CBX7"     "ZFAND5"   "ACVR1B"   "MAMSTR"  
## [37] "ELMO2"    "RIPK2"    "PIK3R1"   "SASH1"
# 2. Generating adjacency matrix for hub genes from all modules
hub_genes<-unname(unlist(network_hub_list))
hubs_m<-TOM[hub_genes, hub_genes]
edges<-data.frame(from=rownames(hubs_m)[row(hubs_m)[upper.tri(hubs_m)]], 
           to=colnames(hubs_m)[col(hubs_m)[upper.tri(hubs_m)]], 
           value=hubs_m[upper.tri(hubs_m)])

# 3. Setting up thredshold for selecting leading edges in the network
leading_edges_ratio = 0.20 #top 1/5 edges based on edge weight.
threshold = quantile(edges$value, probs=1-leading_edges_ratio , na.rm = FALSE)
edges<-edges[edges$value >= threshold,]

# 4. Organizing vertices data frame and edges data frame for the network
vertices<-data.frame()
for(i in 1:length(network_hub_list)){
  module_name<-names(network_hub_list)[i]
  v<-data.frame(name = network_hub_list[[i]], module = rep(module_name, nTOM), size = rep(8, nTOM))
  vertices<-rbind(vertices, v)
}

vertices[,"group"]<-unlist(lapply(vertices$module, function(x) strsplit(x, "_")[[1]][2]))
vertices[,"id"]<-rownames(vertices)


# 5. Centrality analysis of hub genes co-expression network
g<-graph_from_data_frame(edges, directed = F, vertices)
eigenvector<-eigen_centrality(g)$vector
betweenness<-betweenness(g)
degree<-degree(g)
closeness<-closeness(g)
centralities <- cbind(degree, eigenvector, betweenness, closeness)
centrality_df<-cbind(centralities, vertices$module)
#write.csv(centrality_df, file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/table/IMR90_WGCNA_hubs_netwrok_centrality_analysis.csv")
DT::datatable(centrality_df) #show centrality data of nodes for sorting
# 6. Visualize the hub genes co-expression network
edges[,"source"]<-vertices[match(edges$from, vertices$name), "id"]
edges[,"target"]<-vertices[match(edges$to, vertices$name), "id"]
edges$source<-as.numeric(edges$source)
edges$target<-as.numeric(edges$target)

edges<-data.frame(source = edges$source-1, target = edges$target-1, value = edges$value)
vertices<-cbind(vertices, centralities)
vertices[,"betweenness_sqrt"]<-sqrt(vertices$betweenness)

color_df<-data.frame(color = color_pal, module = c(14,1,11,13,5,10,12,4,3,8,6,9,7,2))
color_df<-color_df[order(as.numeric(sub("\\D+", "", color_df$module))),] #match the module color with previous figures.

ColourScale <- 'd3.scaleOrdinal()
            .domain(["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14"])
           .range(["#9ECAE1", "#6A51A3", "#FDAE6B", "#FEE6CE", "#BAE4B3", "#F2F0F7", "#9E9AC8", "#E6550D", "#CBC9E2", "#74C476", "#3182BD", "#238B45", "#EDF8E9", "#DEEBF7"]);'

fn<-forceNetwork(Links = edges, Nodes = vertices,
                 Source = "source", Target = "target",
                 Nodesize = "betweenness_sqrt", fontSize = 12,
                 Value = "value", NodeID = "name",
                 Group = "group", opacity = 1,
                 legend = T, charge = -20, zoom = F,
                 opacityNoHover = 1,
                 colourScale= JS(ColourScale),
                 linkWidth = JS("function(d) { return Math.cbrt(d.value)*0.4; }")
                 )
fn